{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import urllib,time,random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'\n",
    "headers = { 'User-Agent' : user_agent }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n"
     ]
    }
   ],
   "source": [
    "s = []\n",
    "for i in range(1,31):\n",
    "    time.sleep(random.randint(1,5))\n",
    "    print(i)\n",
    "    url = \"http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_{0}.html\".format(i)\n",
    "    req = urllib.request.Request(url,headers=headers)\n",
    "    bs = BeautifulSoup(urllib.request.urlopen(req).read())\n",
    "    for idx ,t in enumerate(bs.find_all(\"div\",class_='list-item')):\n",
    "        v = {}\n",
    "        for idx2,t2 in enumerate(t.find_all(\"div\",class_=\"pro-intro\")):\n",
    "            for idxa,ta in enumerate(t2.find_all(\"a\")):\n",
    "                if ta[\"href\"].find(\"cell\") > 0 and ta[\"href\"].find(\"http\")<0:\n",
    "                    v[\"品牌型号\"] = \"{0}\".format(ta.string)\n",
    "\n",
    "        for idx3,t3 in enumerate(t.find_all(\"ul\",class_=\"param clearfix\")):\n",
    "            for idx4 ,t4 in enumerate(t3.find_all(\"li\")):\n",
    "                v[t4.span.string[:-1]] = t4[\"title\"]\n",
    "\n",
    "        for idx5 ,t5 in enumerate(t.find_all(\"b\",class_=\"price-type\")):\n",
    "            v[\"价格\"] = \"{0}\".format(t5.string)\n",
    "        s.append(v)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "cp = pandas.DataFrame(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>4G网络</th>\n",
       "      <th>CPU型号</th>\n",
       "      <th>CPU频率</th>\n",
       "      <th>RAM容量</th>\n",
       "      <th>ROM容量</th>\n",
       "      <th>主屏尺寸</th>\n",
       "      <th>价格</th>\n",
       "      <th>传感器类型</th>\n",
       "      <th>前置摄像头</th>\n",
       "      <th>后置摄像头</th>\n",
       "      <th>品牌信号</th>\n",
       "      <th>操作系统</th>\n",
       "      <th>电池容量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE</td>\n",
       "      <td>高通 骁龙855</td>\n",
       "      <td>2.84  八核</td>\n",
       "      <td>8GB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.41英寸  2340x1080像素</td>\n",
       "      <td>3298</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1200万像素+1300万像素+200万像素</td>\n",
       "      <td>vivo iQOO（8GB RAM/全网通）</td>\n",
       "      <td>Funtouch OS 9（基于Android 9.0）</td>\n",
       "      <td>4000mAh  不可拆卸式电池</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE</td>\n",
       "      <td>高通 骁龙710</td>\n",
       "      <td>2.2GHz  八核</td>\n",
       "      <td>6GB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.4英寸  2340x1080像素</td>\n",
       "      <td>3999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2000万像素+1200万像素+TOF 3D 立体摄像头</td>\n",
       "      <td>OPPO R17 Pro（6GB RAM/全网通）</td>\n",
       "      <td>ColorOS 5.2（基于Android 8.1）</td>\n",
       "      <td>3700mAh  不可拆卸式电池</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE</td>\n",
       "      <td>海思 Kirin 980</td>\n",
       "      <td>2 x Cortex A76 2.6GHz+2 x Cortex A76 1.92GHz+4...</td>\n",
       "      <td>6GB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.39英寸  3120x1440像素</td>\n",
       "      <td>5399</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4000万像素+2000万像素+800万像素</td>\n",
       "      <td>华为Mate 20 Pro（6GB RAM/全网通）</td>\n",
       "      <td>EMUI 9.0（基于Android 9.0）</td>\n",
       "      <td>4200mAh  不可拆卸式电池</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE</td>\n",
       "      <td>高通 骁龙855</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8GB</td>\n",
       "      <td>128GB/512GB</td>\n",
       "      <td>6.1英寸  3040x1440像素</td>\n",
       "      <td>5999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1600万像素+1200万像素+1200万像素</td>\n",
       "      <td>三星Galaxy S10（全网通）</td>\n",
       "      <td>Android 9.0</td>\n",
       "      <td>3400mAh  不可拆卸式电池</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE</td>\n",
       "      <td>海思 Kirin 980</td>\n",
       "      <td>2×Cortex-A76Based2.6GHz+2×Cortex-A76Based1.92G...</td>\n",
       "      <td>6GB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.4英寸  2310x1080像素</td>\n",
       "      <td>2999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4800万像素+TOF立体深感镜头</td>\n",
       "      <td>荣耀V20（6GB RAM/全网通）</td>\n",
       "      <td>MagicUI2.0.1（基于Android 9.0）</td>\n",
       "      <td>4000mAh  不可拆卸式电池</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             4G网络         CPU型号  \\\n",
       "0  移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE      高通 骁龙855   \n",
       "1  移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE      高通 骁龙710   \n",
       "2  移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE  海思 Kirin 980   \n",
       "3  移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE      高通 骁龙855   \n",
       "4  移动TD-LTE，联通TD-LTE，联通FDD-LTE，电信TD-LTE，电信FDD-LTE  海思 Kirin 980   \n",
       "\n",
       "                                               CPU频率 RAM容量        ROM容量  \\\n",
       "0                                           2.84  八核   8GB          NaN   \n",
       "1                                         2.2GHz  八核   6GB          NaN   \n",
       "2  2 x Cortex A76 2.6GHz+2 x Cortex A76 1.92GHz+4...   6GB          NaN   \n",
       "3                                                NaN   8GB  128GB/512GB   \n",
       "4  2×Cortex-A76Based2.6GHz+2×Cortex-A76Based1.92G...   6GB          NaN   \n",
       "\n",
       "                  主屏尺寸    价格 传感器类型 前置摄像头                         后置摄像头  \\\n",
       "0  6.41英寸  2340x1080像素  3298   NaN   NaN        1200万像素+1300万像素+200万像素   \n",
       "1   6.4英寸  2340x1080像素  3999   NaN   NaN  2000万像素+1200万像素+TOF 3D 立体摄像头   \n",
       "2  6.39英寸  3120x1440像素  5399   NaN   NaN        4000万像素+2000万像素+800万像素   \n",
       "3   6.1英寸  3040x1440像素  5999   NaN   NaN       1600万像素+1200万像素+1200万像素   \n",
       "4   6.4英寸  2310x1080像素  2999   NaN   NaN             4800万像素+TOF立体深感镜头   \n",
       "\n",
       "                         品牌信号                          操作系统              电池容量  \n",
       "0      vivo iQOO（8GB RAM/全网通）  Funtouch OS 9（基于Android 9.0）  4000mAh  不可拆卸式电池  \n",
       "1   OPPO R17 Pro（6GB RAM/全网通）    ColorOS 5.2（基于Android 8.1）  3700mAh  不可拆卸式电池  \n",
       "2  华为Mate 20 Pro（6GB RAM/全网通）       EMUI 9.0（基于Android 9.0）  4200mAh  不可拆卸式电池  \n",
       "3           三星Galaxy S10（全网通）                   Android 9.0  3400mAh  不可拆卸式电池  \n",
       "4          荣耀V20（6GB RAM/全网通）   MagicUI2.0.1（基于Android 9.0）  4000mAh  不可拆卸式电池  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cp.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "w = open(\"cellPhone.csv\",\"w\")\n",
    "w.write(s)\n",
    "w.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
